
// Process simulation output for tables and figures

clear all
set more off

// Set directories to match local machine
local outputdir "replicationFiles/pollutionmodel/output/"
local tabledir "replicationFiles/pollutionmodel/stata_code/"


//import all runs in the outputdir
cd "`outputdir'"
local flist: dir "." files "*.csv" 

//or, define flist directly (e.g. a subset of the directory rather than using all runs
//local flist b p100

// read each csv and save it to a tempfile
foreach fname of local flist {
	cd "`outputdir'"
    import delimited "`fname'", delim(",") varnames(1) case(preserve) asdouble clear  
	//destring to change "NaN" etc to missing 
	foreach var of varlist * {
		if (ustrleft("`:type `var''", 3) == "str" && !inlist("`var'","run","baserun","cardatafile")) {
		destring `var', replace force
	}
	}
	cd "`tabledir'"
	save "temp`fname'.dta", replace
}    
//read them all in:
clear
foreach fname of local flist {
  append using "temp`fname'.dta", force
  erase "temp`fname'.dta"
}

*********************  PROCESS DATA *********************

//global constants 
global pgas 2.2418
global drate 0.0609
global dmg_co 1044.50
global dmg_hc 15046.83
global dmg_nox 35566.44


gsort run t j 
encode run, gen(runenc)

//make time variables, car/truck indices, vehicle indices, labels, age bins
gen ty = t*2 + 1998
gen long typesize = type*10 + size
label define typesizelabel 11 "sm car" 12 "lg car" 21 "sm trk" 22 "lg trk"
label values typesize typesizelabel
gen long typesizemake = mod(j-1,28)+1
gen agebin = -1
replace agebin = 0 if age==0
replace agebin = 1 if(age>=1 & age <=5)
replace agebin = 6 if(age>=6 & age <=10)
replace agebin = 11 if(age>=11 & age <=18)
label define agebinlabel -1 "all" 0 "0-1" 1 "2-11" 6 "12-21" 11 "22-37"
label values agebin agebinlabel
gen ageyears = age*2
gen vintage = ty - ageyears

//change units to annual and readable for display, merge regions
replace vmt = vmt / 2 // vmt in miles per year
gen tau_original_output = tau
replace tau = tau/2000  // tau in thousands per year to match rj
replace agg_co = agg_co / 2000000  //million tons per year
replace agg_hc = agg_hc / 2000000  
replace agg_nox = agg_nox / 2000000  
replace proptax = proptax/2000 // in thousands per year
gen double rj = rj1 / 2  // rj in thousands per year
gen double pj = pj1  //thousands
gen double pscrap = pscrap1  //1 = 100% scrappage
gen double hj = hj1 / 2  // repair cost in thousands per year
gen double d = (d1+d2) / 1000000  //d for vehicles in millions
foreach var of varlist dda* {
	replace `var' = `var'/1000000
}
gen double dwhennew = d1_when_new /1000000 * (d1+d2)/d1  //original demand for a vehicle when it was new
gen double dldt = (d*(type==2)) 
gen double dwhennewldt = (dwhennew*(type==2)) 
gen double M = (M_total_1 + M_total_2) / 2000000 //income in billions
gen double agg_d = (agg_d1 + agg_d2) / 1000000  //millions
egen double agg_d_ldt = sum(d*(type==2)), by(run t)
replace agg_d_ldt = . if !missing(j)
gen double agg_ldtshare = agg_d_ldt/agg_d
gen double agg_gasoline = (agg_gasoline1 + agg_gasoline2) / 2000000000 // billion gallons per year
gen double util = (util1 + util2) / 2000000
gen double profit = (profit1 + profit2) / 2000000
gen double profitrental = (profitrental1 + profitrental2) / 2000000
gen double profit_adj = (profit_adj1 + profit_adj2) / 2000000
gen double profitboth = (profit1 + profit2 + profitrental1 + profitrental2) / 2000000
gen double taxrev = (taxrev_tot_1 + taxrev_tot_2) / 2000000
gen double proptaxrev = (proptaxrev1 + proptaxrev2) / 2000000
egen double agg_vmt = sum(vmt*d/1000), by(run t)   //aggregate vmt in billion miles per year
gen anndamagepercar = damagepercar/2
gen annlcdamagepercar = lcdamagepercar/2

//make aggregate damage variables (all in billions per year to match utility, M, etc.) 
gen double damage = cartot_damage/2000000000
gen double lcdamage = cartot_lcdamage/2000000000
gen double damage_co = cartot_co*$dmg_co /2000000000
gen double damage_hc = cartot_hc*$dmg_hc /2000000000
gen double damage_nox = cartot_nox*$dmg_nox /2000000000

egen double agg_damage = sum(damage), by(run t)
egen double agg_lcdamage = sum(lcdamage), by(run t)
egen double agg_damage_co = sum(damage_co), by(run t)
egen double agg_damage_hc = sum(damage_hc), by(run t)
egen double agg_damage_nox = sum(damage_nox), by(run t)

gen double welfare = util - agg_damage
gen double lcwelfare = util - agg_lcdamage

//divide income up by spending category (billions)
egen double agg_exp_rj = sum(rj*d), by(run t) 
egen double agg_exp_tau = sum(tau*d), by(run t)
egen double agg_exp_gas = sum( $pgas *vmt/mpg1*d/1000), by(run t)
egen double agg_exp_hj = sum(hj*d), by(run t)
egen double agg_exp_costj = sum(cost1*d/2*(age==0)), by(run t)
gen double agg_exp_numeraire = M - agg_exp_rj - agg_exp_tau - agg_exp_gas  //hj and costj come out of industry profits and so the model has already removed them from M


//generate quantity-weighted and baseline-quantity-weighted averages by age and age bin for car-level vars
//first put baseline quantities into policy runs to use for weighting
gen isbase = (run==baserun)
egen double base_d = total(d * isbase), by (baserun t j)
//loop through variables to make weighted averages
foreach carbin of varlist pj rj pscrap tau ageyears {
	egen double avgall_`carbin' = wtmean(`carbin'), weight(d) by(run t)
	egen double avgab_`carbin' = wtmean(`carbin'), weight(d) by(run t agebin)
	replace avgab_`carbin' = avgall_`carbin' if missing(j)
	egen double avga_`carbin' = wtmean(`carbin'), weight(d) by(run t age)
	replace avga_`carbin' = avgall_`carbin' if missing(j)
	drop avgall_`carbin'
	egen double bwavgall_`carbin' = wtmean(`carbin'), weight(base_d) by(run t)
	egen double bwavgab_`carbin' = wtmean(`carbin'), weight(base_d) by(run t agebin)
	replace bwavgab_`carbin' = bwavgall_`carbin' if missing(j)
	egen double bwavga_`carbin' = wtmean(`carbin'), weight(base_d) by(run t age)
	replace bwavga_`carbin' = bwavgall_`carbin' if missing(j)
	drop bwavgall_`carbin'
}

//generate sums by age and age bin for car-level vars
foreach carbin of varlist d dwhennew dldt dwhennewldt damage lcdamage {
	egen double sumall_`carbin' = sum(`carbin'), by(run t)
	egen double sumab_`carbin' = sum(`carbin'), by(run t agebin)
	replace sumab_`carbin' = sumall_`carbin' if missing(j)
	egen double suma_`carbin' = sum(`carbin'), by(run t age)
	replace suma_`carbin' = sumall_`carbin' if missing(j)
	drop sumall_`carbin'
}

//generate variables we want available as differences relative to the baseline
foreach diffvar of varlist util welfare lcwelfare profit profitrental profitboth M proptaxrev agg_damage damage agg_lcdamage lcdamage agg_d agg_vmt d rj dda* tailpipecost sumab_d sumab_damage sumab_lcdamage bwavgab_pj bwavgab_rj bwavgab_pscrap suma_d suma_damage suma_lcdamage bwavga_pj bwavga_rj bwavga_pscrap avga_ageyears {
	egen double baseline_`diffvar' = total(`diffvar' * isbase), by (baserun t j)
	gen double `diffvar'diff = `diffvar' - baseline_`diffvar'
	gen double `diffvar'pctd = 100*(`diffvar' - baseline_`diffvar') / baseline_`diffvar'
	drop baseline_`diffvar'
}

//construct discounted sums of variables (note need to multiply by 2 again since )
gen double dfactor = 1/(1+ $drate )^(t-1)
foreach dsum of varlist util utildiff welfare lcwelfare welfarediff lcwelfarediff profit profitdiff profitrental profitrentaldiff profitboth profitbothdiff M Mdiff taxrev proptaxrev proptaxrevdiff agg_damage agg_lcdamage agg_damagediff agg_lcdamagediff {
	//local dendyears 8 10 11 16 18 30
	local dendyears 10 18 30
	foreach dendyear of local dendyears {
		egen double t`dendyear'dsum`dsum' = total(2*`dsum'*dfactor) if t<=`dendyear', by (run j)
		replace t`dendyear'dsum`dsum' = . if t > 1	
	}
}

//discounted benefit-cost ratios
local dendyears 10 18 30
foreach dendyear of local dendyears {
	gen double t`dendyear'bcratio = t`dendyear'dsumagg_lcdamagediff / t`dendyear'dsumutildiff

}

//**************************************************************************************************************************
//**************************************************************************************************************************

//finished processing data
save quantitative_model_runs, replace


use quantitative_model_runs, clear


//** DATA FOR WELFARE TABLE IN MAIN TEXT AND APPENDIX **//

//aggregates 
export delimited run ty welfare lcwelfare util agg_damage agg_lcdamage M agg_exp_rj agg_exp_tau agg_exp_gas agg_exp_numeraire agg_exp_costj agg_exp_hj profitboth profit_adj taxrev proptaxrev agg_vmt agg_d agg_co agg_hc agg_nox ///
if(missing(j))  using table_aggregates, replace

//differences
export delimited run ty welfarediff lcwelfarediff utildiff agg_damagediff agg_lcdamagediff Mdiff profitbothdiff taxrev proptaxrevdiff agg_vmtdiff agg_ddiff agg_vmtpctd agg_dpctd dfactor if(runmode==1) using table_differences, replace

//discounted sums
local dendyears 10
foreach dendyear of local dendyears {
	export delimited run ty t`dendyear'dsumwelfarediff t`dendyear'dsumlcwelfarediff t`dendyear'dsumutildiff t`dendyear'dsumagg_damagediff t`dendyear'dsumagg_lcdamagediff t`dendyear'dsumMdiff t`dendyear'dsumprofitbothdiff t`dendyear'dsumtaxrev t`dendyear'dsumproptaxrev t`dendyear'bcratio if(t==1 & runmode==1) using table_discsums_to_t`dendyear', replace
	
}


//** FIGURES **//


// Figure 9 panels A-F:


** 9A **
preserve
	keep if run=="p100"
	drop if missing(j)
	collapse (mean) tau [pw=d], by(run ty ageyears)
	list
	replace tau = tau * 1000
	graph bar tau* if (ty==2000), over(ageyears) ///
		ytitle("Annual Fee (US dollars)") b1title("Age") ///
		graphregion(color(white)) bgcolor(white)
	graph export "figIXa_tau_byage_p100_y2000.pdf", replace
restore


** 9B **
// combines quantity and damage change 
preserve
	drop if missing(j)
	collapse (rawsum) lcdamagediff (mean) dpctd [pw=base_d], by(run ty ageyears)
	tw (connected dpctd ageyears if (run=="p100" & ty==2000), yaxis(1) msymbol(Sh)) ///
	   (connected lcdamagediff age if (run=="p100" & ty==2000), yaxis(2) lpattern(dash)), ///
	   		ytitle("Change in damage (US{c S|}billions)", axis(2)) ////
		ytitle("Change in Q (%)") ///
		graphregion(color(white)) ///
		bgcolor(white) ///
		yscale(noline) xscale(noline) ///
				xtitle("Vehicle Age") ///
				legend(order(1 "Quantity change" 2 "Damage change")) ///
		yline(0, lcolor(black))
		
	graph export "figIXb_qpercent_and_damage_byage_p100_y2000.pdf", replace
restore


** 9C **
preserve
	drop if missing(j)
	collapse (mean) tau [pw=d], by(run ty ageyears)
	*keep if ageyears==0
	replace tau = tau * 2 // (tax is paid with the first year's rental, so one time amount is 2x the "annual" within the new age category)
	replace tau = tau * 1000
	list if (run=="pn100" & ty==2000)
	graph bar tau* if (run=="pn100" & ty==2000), ///
		over(ageyears) ///
		ytitle("Annual Fee (US dollars)") ///
		graphregion(color(white)) bgcolor(white)
	graph export "figIXc_tau_byage_pn100_y2000.pdf", replace
restore


** 9D **
preserve
	drop if missing(j)
	collapse (rawsum) lcdamagediff (mean) dpctd [pw=base_d], by(run ty ageyears)
	tw (connected dpctd ageyears if (run=="pn100" & ty==2000), yaxis(1) msymbol(Sh)) ///
	   (connected lcdamagediff age if (run=="pn100" & ty==2000), yaxis(2) ylabel(-4(1)1.39, axis(2)) lpattern(dash)), ///
	   		ytitle("Change in damage (US{c S|}billions)", axis(2)) ////
		ytitle("Change in Q (%)") ///
		graphregion(color(white)) ///
		bgcolor(white) ///
		yscale(noline) xscale(noline) ///
				xtitle("Vehicle Age") ///
				legend(order(1 "Quantity change" 2 "Damage change")) ///
		yline(0, lcolor(black))
		
	graph export "figIXd_qpercent_and_damage_byage_pn100_y2000.pdf", replace
restore


** 9E **
// tau by age, flat property tax
preserve
	drop if missing(j)
	collapse (mean) tau [pw=d], by(run ty ageyears)
	replace tau = tau * 1000
	graph bar tau* if (run=="pf100" & ty==2000), ///
		over(ageyears) ///
		ytitle("Annual Fee Change (US dollars)") b1title("Age") ///
		graphregion(color(white)) bgcolor(white)
	graph export "figIXe_tau_byage_pf100_y2000.pdf", replace
restore


** 9F **
preserve
	drop if missing(j)
	collapse (rawsum) lcdamagediff (mean) dpctd [pw=base_d], by(run ty ageyears)
	tw (connected dpctd ageyears if (run=="pf100" & ty==2000), yaxis(1) ylabel(-2(1)1) msymbol(Sh)) ///
	   (connected lcdamagediff age if (run=="pf100" & ty==2000), yaxis(2) ylabel(-0.2(0.1).092, axis(2)) lpattern(dash)), ///
	   		ytitle("Change in damage (US{c S|}billions)", axis(2)) ////
		ytitle("Change in Q (%)") ///
		graphregion(color(white)) ///
		bgcolor(white) ///
		yscale(noline) xscale(noline) ///
				xtitle("Vehicle Age") ///
				legend(order(1 "Quantity change" 2 "Damage change")) ///
		yline(0, lcolor(black))
		
	graph export "figIXf_qpercent_and_damage_byage_pf100_y2000.pdf", replace
restore



//******** APPENDIX FIGURES ********//


// model fit graphics

// first run on Colorado data to get the original data points

preserve
	u "../../../dataSTATA/combined/combined_smogcheck_colorado.dta", clear
	keep if age >= 4
	* age in 2-year categories
	replace age = 4  if age == 5
	replace age = 6  if age == 7
	replace age = 8  if age == 9
	replace age = 10 if age == 11
	replace age = 12 if age == 13
	replace age = 14 if age == 15
	replace age = 16 if age == 17
	replace age = 18 if age == 19
	replace age = 20 if age == 21
	replace age = 22 if age == 23
	replace age = 24 if age == 25
	replace age = 26 if age == 27
	replace age = 28 if age == 29
	replace age = 30 if age >= 31 & age < .
	g       group = 1 if model_year >= 2004 & model_year <= 2009
	replace group = 2 if model_year >= 1998 & model_year <= 2003
	replace group = 3 if model_year >= 1992 & model_year <= 1997
	replace group = 4 if model_year <= 1991 
	* exclude miniscule cell sizes
	keep if (group == 1 & age <= 10) | (group == 2 & age <= 16) | (group == 3 & age <= 22) | (group == 4 & age >= 6)
	collapse (mean) emissions_* annualVmt , by(group age v_veh_type) fast
	
	gen type = 1*(v_veh_type=="P") + 2*(v_veh_type=="T")
	drop v_veh_type
	save "type_age_vintage_fit_colorado_data", replace
restore
	
// now use simulation output
preserve
	keep if run=="b" & !missing(j)

	//collapse model sample to match the colorado data
	keep if ty<=2014

	drop age
	rename ageyears age
	rename vintage model_year
	rename phi_co emissions_used_CO
	rename phi_hc emissions_used_HC
	rename phi_nox emissions_used_NOX

	gen     group = 1 if model_year >= 2004 & model_year <= 2009
	replace group = 2 if model_year >= 1998 & model_year <= 2003
	replace group = 3 if model_year >= 1992 & model_year <= 1997
	replace group = 4 if model_year <= 1991 

	* exclude miniscule cell sizes
	keep if age >= 4 & age<=30
	keep if (group == 1 & age <= 10) | (group == 2 & age <= 16) | (group == 3 & age <= 22) | (group == 4 & age >= 6)
	keep if model_year>=1982
	collapse (mean) emissions_* [pw=d], by(group age type)

	rename emissions_used_CO sim_emissions_used_CO
	rename emissions_used_HC sim_emissions_used_HC
	rename emissions_used_NOX sim_emissions_used_NOX

	//merge 1:1 group age using age_vintage_targets_colorado_data
	merge 1:1 group age type using type_age_vintage_fit_colorado_data

	tw (scatter emissions_used_CO sim_emissions_used_CO) (function y=x, range(.8 62) lpattern(shortdash) lcolor(black)), ///
	yscale(noline log) xscale(noline log) ylab(1 2 4 8 16 32 64) xlab(1 2 4 8 16 32 64) ///
			leg(off) graphr(color(white))				 ///
			ytit("Grams per Mile, Colorado Dataset")		 ///
			xtit("Grams per Mile, Quantitative Model")		///
			xsize(4.1)
			graph export "figA9a_modelfit_45CO.eps", replace

			
	tw (scatter emissions_used_HC sim_emissions_used_HC) (function y=x, range(.05 4.5) lpattern(shortdash) lcolor(black)), ///
	yscale(noline log) xscale(noline log) ylab(0.05 0.1 0.2 0.4 0.8 1.6 3.2) xlab(0.05 0.1 0.2 0.4 0.8 1.6 3.2) ///
			leg(off) graphr(color(white))				 ///
			ytit("Grams per Mile, Colorado Dataset")		 ///
			xtit("Grams per Mile, Quantitative Model")		///
			xsize(4.1)
			graph export "figA9b_modelfit_45HC.eps", replace

// 	tw (scatter emissions_used_NOX sim_emissions_used_NOX) (function y=x, range(.16 4) lpattern(shortdash) lcolor(black)), ///
// 	yscale(noline log) xscale(noline log) ylab(0.25 0.5 1 2 4) xlab(0.25 0.5 1 2 4) ///
// 			leg(off) graphr(color(white))				 ///
// 			ytit("Grams per Mile, Colorado Dataset")		 ///
// 			xtit("Grams per Mile, Quantitative Model")		///
// 			xsize(4.1)
// 			graph export "modelfit_45NOX.eps", replace

restore
		
// replicate CDFs using model output	
preserve
	keep if run=="b" & !missing(j)
	keep if ty == 2014
	drop age
	rename ageyears age
	keep if age<=30

	collapse (rawsum) base_d (mean) phi_co phi_hc phi_nox vmt mpg1 [pw=base_d], by(age)
	rename phi_co emissions_used_CO
	rename phi_hc emissions_used_HC
	rename phi_nox emissions_used_NOX
	gen annualVmt = vmt/1000
	gen emissions_used_CO2 = 1.15 * 1/mpg1 * 8887 //15% to approximately correct real world versus CAFE test cycle 

	g       vmtPerPeriod = annualVmt * 2 
	g total_CO  = emissions_used_CO  * vmtPerPeriod * base_d
	g total_HC  = emissions_used_HC  * vmtPerPeriod * base_d
	g total_NOX = emissions_used_NOX * vmtPerPeriod * base_d
	g total_CO2 = emissions_used_CO2 * vmtPerPeriod * base_d
	g total_vmt = vmtPerPeriod * base_d
	collapse (sum) total_*, by(age) fast
	egen Ttotal_CO  = total(total_CO)
	egen Ttotal_HC  = total(total_HC)
	egen Ttotal_NOX = total(total_NOX)
	egen Ttotal_CO2 = total(total_CO2)
	egen Ttotal_vmt = total(total_vmt)
	g sh_CO  = total_CO  / Ttotal_CO
	g sh_HC  = total_HC  / Ttotal_HC
	g sh_NOX = total_NOX / Ttotal_NOX
	g sh_CO2 = total_CO2 / Ttotal_CO2
	g sh_vmt = total_vmt / Ttotal_vmt
	g cdf_CO  = sum(sh_CO)
	g cdf_HC  = sum(sh_HC)
	g cdf_NOX = sum(sh_NOX)
	g cdf_CO2 = sum(sh_CO2)
	g cdf_vmt = sum(sh_vmt)

	keep if age>=4

	tw  (connected cdf_CO age, msymb(C)) ///
		(connected cdf_HC age, msymb(S) lpattern(dash)) ///
		(connected cdf_NOX age, msymb(Dh) lpattern(shortdash)) ///
		(line cdf_CO2 age, lpattern(longdash)), ///
		graphr(color(white)) ///
		yscale(noline) ///
		ytit("Cumulative share of lifetime pollution") ///
		xtit("Age (Years)") ///
		xline(10 15) ///
		ylab(0 0.25 0.5 0.75 1) ///
		xlab(4 8 12 16 20 24 30) ///
		legend(order(1 "Carbon monoxide (CO)" 2 "Hydrocarbons (HC)" 3 "Nitrogen oxides (NOx)" 4 "Carbon dioxide (CO2)")) 
		
		graph export "figA10a_cumulative_quant_model.eps", replace

restore



erase "type_age_vintage_fit_colorado_data.dta"


//*********** APPENDIX TABLE IMPACTS BY INCOME GROUP ****************

preserve

// reg fee per veh (multiply by avg choice occasions in each income group to get per hh)
keep if inlist(run,"b","p100","p100rn","pn100", "pf100")

drop dda*pctd
local bins 1 2 3 4 5 6 7 8 9
foreach bin of local bins {
	replace dda`bin' = ddao`bin' if missing(j)
	replace dda`bin'diff = ddao`bin'diff if missing(j)	
	drop ddao`bin' ddao`bin'diff
	rename dda`bin'diff ddadiff`bin'
}

keep run t j ty ageyears proptax tau rj rjdiff dda*

foreach var of varlist proptax tau rj rjdiff {
	replace `var' = 0 if missing(j)
}
replace j = 0 if missing(j)

reshape long dda ddadiff, i(run t j) j(incomebin)
gen ddabase = dda - ddadiff
gen proptaxbase = 1000 * proptax
gen proptaxpol = 1000 * (proptax + tau) 
gen deltaproptax = 1000 * tau
gen deltaproptaxrj = 1000 * (tau + rjdiff)

gen paidbaseq = ddabase*proptaxpol
gen paidpolq = dda*proptaxpol

//make tables for computing discounted sums (effects here are divided by 20 for annualized values in final table)
log using tableA9_income_bin_data, replace
table run incomebin t [pw=ddabase] if t<=10, statistic(mean proptaxpol) 
table run incomebin t [pw=dda] if t<=10, statistic(mean proptaxpol) 
log close


// make graph of age distribution by income bin
keep if run=="b" & ty==2000
gen plotbin = 1 if incomebin==1 | incomebin==2
replace plotbin = 2 if incomebin==9
drop if missing(plotbin)
collapse (sum) ddabase, by(plotbin ageyears)
egen choices = total(ddabase), by(plotbin)
egen cars = total(ddabase) if !missing(ageyears), by(plotbin)
gen ddabasefrac = ddabase/choices/2  // divide by 2 to make it fraction at a single year rather than fraction in a 2-year age bin
gen ddabasefraccars = ddabase/cars/2

twoway (bar ddabasefraccars ageyears if plotbin==1, bcolor(red%30) barw(1.3)) ///
(bar ddabasefraccars ageyears if plotbin==2, bcolor(green%30) barw(1.3)), ///
xscale(range(0 36)) xlab(0 6 12 18 24 30 36) ytitle("Fraction of vehicles at age") xtitle("Age") legend(order(2 "Income >80k" 1 "Income <20k")) graphregion(color(white)) bgcolor(white)
graph export figA11c_baseline_age_by_income.pdf, replace
restore

